'''
Created on 16 nov. 2013

@author: ivan
'''

import csv as csv 
import numpy as np

#first option to read the file 
csv_file_object = csv.reader(open('data/train.csv', 'rb')) #Open up the csv file in to a Python object
header = csv_file_object.next() #The next() command just skips the first line which is a header
print header
data=[] #Create a variable called 'data
for row in csv_file_object: #Run through each row in the csv file
    #print row
    data.append(row) #adding each row to the data variable
data = np.array(data) #Then convert from a list to an array. Be aware that each item is currently a string in this format

number_passengers = np.size(data[0::,0].astype(np.float))
number_survived = np.sum(data[0::,1].astype(np.float))
proportion_survivors = number_survived / number_passengers
 
women_only_stats = data[0::,4] == 'female' 
men_only_stats = data[0::,4] != 'female' 
 
women_onboard = data[women_only_stats,1].astype(np.float)     
men_onboard = data[men_only_stats,1].astype(np.float)
proportion_women_survived = np.sum(women_onboard) / np.size(women_onboard)  
proportion_men_survived = np.sum(men_onboard) / np.size(men_onboard) 
 
print 'Proportion of women who survived is %s' % proportion_women_survived
print 'Proportion of men who survived is %s' % proportion_men_survived


#second option to read the file from https://www.kaggle.com/wiki/GettingStartedWithPythonForDataScience
#create the training & test sets, skipping the header row with [1:]
dataset = np.genfromtxt(open('data/train.csv','r'), delimiter=',', dtype='f8')[1:]    
print dataset
target = [x[0] for x in dataset]
train = [x[1:] for x in dataset]
#test = np.genfromtxt(open('Data/test.csv','r'), delimiter=',', dtype='f8')[1:]





